Specify the tissue of interest, run the boilerplate code which sets up the functions and environment, load the tissue object.
tissue_of_interest = "Lung"
library(here)
## here() starts at /home/rstudio/tabula-muris
source(here("00_data_ingest", "02_tissue_analysis_rmd", "boilerplate.R"))
## Loading required package: ggplot2
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
## Loading required package: Matrix
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.3.4 ✔ purrr 0.2.4
## ✔ tidyr 0.7.2 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ cowplot::ggsave() masks ggplot2::ggsave()
## ✖ dplyr::lag() masks stats::lag()
tiss <- load_tissue_facs(tissue_of_interest)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
Visualize top genes in principal components
PCElbowPlot(object = tiss)
# Set number of principal components.
n.pcs = 20
# Set resolution
res.used <- 3
tiss <- FindClusters(object = tiss, reduction.type = "pca", dims.use = 1:n.pcs,
resolution = res.used, print.output = 0, save.SNN = TRUE)
tiss <- RunTSNE(object = tiss, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
# note that you can set do.label=T to help label individual clusters
TSNEPlot(object = tiss, do.label = T)
# Batch and animal effects
TSNEPlot(object = tiss, do.return = TRUE, group.by = "plate.barcode")
TSNEPlot(object = tiss, do.return = TRUE, group.by = "mouse.id")
Check expression of genes of interset.
Dotplots let you see the intensity of expression and the fraction of cells expressing for each of your genes of interest.
How big are the clusters?
table(tiss@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 148 135 125 121 120 113 101 90 85 78 68 65 62 57 53 47 42 40
## 18 19 20 21 22
## 39 37 35 30 25
Which markers identify a specific cluster?
#clust.markers <- FindMarkers(object = tiss, ident.1 = 3, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
#print(x = head(x= clust.markers, n = 10))
At a coarse level, we can use canonical markers to match the unbiased clustering to known cell types:
# stash current cluster IDs
tiss <- StashIdent(object = tiss, save.name = "cluster.ids")
# enumerate current cluster IDs and the labels for them
cluster.ids <- 0:22
free_annotation <- c(NA,
NA,
NA,
NA,
NA,
"alveolar epithelial type 1 cells, alveolar epithelial type 2 cells, club cells, and basal cells",
NA,
"invading monocytes",
"dendritic cells, alveolar macrophages, and interstital macrophages",
NA,
NA,
"circulating monocytes",
NA,
NA,
NA,
NA,
NA,
"lung neuroendocrine cells and unknown cells",
NA,
NA,
"mast cells and unknown immune cells",
NA,
"multiciliated cells")
cell_ontology_class <-c("lung endothelial cell",
"lung endothelial cell",
"stromal cell",
"lung endothelial cell",
"lung endothelial cell",
"epithelial cell of lung",
"stromal cell",
"classical monocyte",
"myeloid cell",
"stromal cell",
"lung endothelial cell",
"monocyte",
"lung endothelial cell",
"B cell",
"T cell",
"stromal cell",
"stromal cell",
NA,
"lung endothelial cell",
"natural killer cell",
"leukocyte",
"stromal cell",
"ciliated columnar cell of tracheobronchial tree")
tiss = stash_annotations(tiss, cluster.ids, free_annotation, cell_ontology_class)
data.frame(cell_ontology_class, free_annotation)
## cell_ontology_class
## 1 lung endothelial cell
## 2 lung endothelial cell
## 3 stromal cell
## 4 lung endothelial cell
## 5 lung endothelial cell
## 6 epithelial cell of lung
## 7 stromal cell
## 8 classical monocyte
## 9 myeloid cell
## 10 stromal cell
## 11 lung endothelial cell
## 12 monocyte
## 13 lung endothelial cell
## 14 B cell
## 15 T cell
## 16 stromal cell
## 17 stromal cell
## 18 <NA>
## 19 lung endothelial cell
## 20 natural killer cell
## 21 leukocyte
## 22 stromal cell
## 23 ciliated columnar cell of tracheobronchial tree
## free_annotation
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 alveolar epithelial type 1 cells, alveolar epithelial type 2 cells, club cells, and basal cells
## 7 <NA>
## 8 invading monocytes
## 9 dendritic cells, alveolar macrophages, and interstital macrophages
## 10 <NA>
## 11 <NA>
## 12 circulating monocytes
## 13 <NA>
## 14 <NA>
## 15 <NA>
## 16 <NA>
## 17 <NA>
## 18 lung neuroendocrine cells and unknown cells
## 19 <NA>
## 20 <NA>
## 21 mast cells and unknown immune cells
## 22 <NA>
## 23 multiciliated cells
TSNEPlot(object = tiss, do.return = TRUE, group.by = "free_annotation", do.label = T, no.legend = T)
## Warning: Removed 1 rows containing missing values (geom_text).
TSNEPlot(object = tiss, do.return = TRUE, group.by = "cell_ontology_class", do.label = T, no.legend = T)
## Warning: Removed 1 rows containing missing values (geom_text).
subtiss = SubsetData(tiss, ident.use = c(5,17,22))
subtiss <- subtiss %>% ScaleData() %>%
FindVariableGenes(do.plot = TRUE, x.high.cutoff = Inf, y.cutoff = 0.5) %>%
RunPCA(do.print = FALSE) %>% ProjectPCA(do.print = FALSE)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
PCHeatmap(object = subtiss, pc.use = 1:9, cells.use = 100, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
PCElbowPlot(subtiss)
sub.n.pcs = 10
sub.res.use = 1
subtiss <- subtiss %>% FindClusters(reduction.type = "pca", dims.use = 1:sub.n.pcs,
resolution = sub.res.use, print.output = 0, save.SNN = TRUE) %>%
RunTSNE(dims.use = 1:sub.n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = subtiss, do.label = T, pt.size = 1.2, label.size = 4)
Epithelial clusters (besides Type II and Ciliated, where n is sufficently high) do not align with known marker gene expression because of low numbers. Cannot subcluster further!
subtiss2 = SubsetData(tiss, ident.use = c(8, 7, 11))
subtiss2 <- subtiss2 %>% ScaleData() %>%
FindVariableGenes(do.plot = TRUE, x.high.cutoff = Inf, y.cutoff = 1, x.low.cutoff = 0.5) %>%
RunPCA(do.print = FALSE) %>% ProjectPCA(do.print = FALSE)
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
PCHeatmap(object = subtiss2, pc.use = 1:9, cells.use = 100, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)
PCElbowPlot(subtiss2)
sub2.n.pcs = 13
sub2.res.use = 2
subtiss2 <- subtiss2 %>% FindClusters(reduction.type = "pca", dims.use = 1:sub2.n.pcs,
resolution = sub2.res.use, print.output = 0, save.SNN = TRUE) %>%
RunTSNE(dims.use = 1:sub2.n.pcs, seed.use = 10, perplexity=30)
TSNEPlot(object = subtiss2, do.label = T, pt.size = 1.2, label.size = 4)
TSNEPlot(object = tiss, do.label = TRUE, pt.size = 0.5, group.by='free_annotation')
## Warning: Removed 1 rows containing missing values (geom_text).
Myeloid clusters (besides dendritic and two monocyte clusters, where n is sufficently high) do not align with known marker gene expression because of low numbers. Cannot subcluster further!
When you save the annotated tissue, please give it a name.
filename = here('00_data_ingest', '04_tissue_robj_generated',
paste0("facs_", tissue_of_interest, "_seurat_tiss.Robj"))
print(filename)
## [1] "/home/rstudio/tabula-muris/00_data_ingest/04_tissue_robj_generated/facs_Lung_seurat_tiss.Robj"
save(tiss, file=filename)
# To reload a saved object
# filename = here('00_data_ingest', '04_tissue_robj_generated',
# paste0("facs_", tissue_of_interest, "_seurat_tiss.Robj"))
# load(file=filename)
Write the cell ontology and free annotations to CSV.
save_annotation_csv(tiss, tissue_of_interest, "facs")